## Summary of applications by treatment status and subgroup
##
## Cait Unkovic, Maya Sen, Kevin Quinn
##
## 2/15/2016
##

set.seed(553920)




###################################################################
## read data in and do some processing of the raw data
x <- read.csv("gradcontacts_treatedcontrol_import.csv",
              stringsAsFactors=FALSE)

names(x) <- c("univ", "sex", "ID", "treat", "applied", "accept",
              "app.sex", "app.race")

## replace'2' as an acceptance outcome -- the original dataset had three levels
## but only used two (0 indicates not accepted for any participation,
##                    2 indicates accepted for poster presentation)
x$accept[x$accept==2] <- 1

## replace 9 as an outcome for accept --
## I used 9 to indicate someonw who didn't apply and couldn't
## be accepted when hand merging the datasets
x$accept[x$accept==9] <- 0


## attach the school rank data
univlist <- read.csv("university.list.csv",
                     stringsAsFactors=FALSE)
## clean up a bad entry
univlist <- univlist[1:53,]
## convert rank to numeric
univlist$u.rank <- as.numeric(univlist$u.rank)

x$Dept.Rank <- NULL
for (u in unique(x$univ)){
  x$Dept.Rank[x$univ == u] <- univlist$u.rank[univlist$univ == u]
}










output.table <- data.frame(subgroup=c("Full Sample", "Men", "Women",
                             "Top 10", "Men Top 10", "Women Top 10",
                             "Top 11 to 25", "Men Top 11 to 25",
                             "Women Top 11 to 25",
                             "Top 26 to 50", "Men Top 26 to 50",
                             "Women Top 26 to 50"),
                           n=NA,
                           n.treated=NA, n.control=NA, t.app=NA, c.app=NA)






## overall 
output.table$subgroup[1] <- "Full Sample"
output.table$n[1] <- nrow(x)
output.table$n.treated[1] <- sum(x$treat)
output.table$n.control[1] <- sum(x$treat==0)
output.table$t.app[1] <- sum(x$applied[x$treat==1])
output.table$c.app[1] <- sum(x$applied[x$treat==0])


## men
x.sub <- x[x$sex=="Male",]
output.table$subgroup[2] <- "Men"
output.table$n[2] <- nrow(x.sub)
output.table$n.treated[2] <- sum(x.sub$treat)
output.table$n.control[2] <- sum(x.sub$treat==0)
output.table$t.app[2] <- sum(x.sub$applied[x.sub$treat==1])
output.table$c.app[2] <- sum(x.sub$applied[x.sub$treat==0])


## women
x.sub <- x[x$sex=="Female",]
output.table$subgroup[3] <- "Women"
output.table$n[3] <- nrow(x.sub)
output.table$n.treated[3] <- sum(x.sub$treat)
output.table$n.control[3] <- sum(x.sub$treat==0)
output.table$t.app[3] <- sum(x.sub$applied[x.sub$treat==1])
output.table$c.app[3] <- sum(x.sub$applied[x.sub$treat==0])



## both men and women top-10 programs
x.sub <- x[x$Dept.Rank <= 10,]
output.table$subgroup[4] <- "Top 10"
output.table$n[4] <- nrow(x.sub)
output.table$n.treated[4] <- sum(x.sub$treat)
output.table$n.control[4] <- sum(x.sub$treat==0)
output.table$t.app[4] <- sum(x.sub$applied[x.sub$treat==1])
output.table$c.app[4] <- sum(x.sub$applied[x.sub$treat==0])




## men in top-10 programs
x.sub <- x[x$Dept.Rank <= 10,]
x.sub <- x.sub[x.sub$sex=="Male",]
output.table$subgroup[5] <- "Men Top 10"
output.table$n[5] <- nrow(x.sub)
output.table$n.treated[5] <- sum(x.sub$treat)
output.table$n.control[5] <- sum(x.sub$treat==0)
output.table$t.app[5] <- sum(x.sub$applied[x.sub$treat==1])
output.table$c.app[5] <- sum(x.sub$applied[x.sub$treat==0])




## women in top-10 programs
x.sub <- x[x$Dept.Rank <= 10,]
x.sub <- x.sub[x.sub$sex=="Female",]
output.table$subgroup[6] <- "Women Top 10"
output.table$n[6] <- nrow(x.sub)
output.table$n.treated[6] <- sum(x.sub$treat)
output.table$n.control[6] <- sum(x.sub$treat==0)
output.table$t.app[6] <- sum(x.sub$applied[x.sub$treat==1])
output.table$c.app[6] <- sum(x.sub$applied[x.sub$treat==0])




## both men and women in 11-25 programs
x.sub <- x[x$Dept.Rank >=11 & x$Dept.Rank <=25,]
output.table$subgroup[7] <- "Top 11 to 25"
output.table$n[7] <- nrow(x.sub)
output.table$n.treated[7] <- sum(x.sub$treat)
output.table$n.control[7] <- sum(x.sub$treat==0)
output.table$t.app[7] <- sum(x.sub$applied[x.sub$treat==1])
output.table$c.app[7] <- sum(x.sub$applied[x.sub$treat==0])




## men in 11-25 programs
x.sub <- x[x$Dept.Rank >=11 & x$Dept.Rank <=25,]
x.sub <- x.sub[x.sub$sex=="Male",]
output.table$subgroup[8] <- "Men Top 11 to 25"
output.table$n[8] <- nrow(x.sub)
output.table$n.treated[8] <- sum(x.sub$treat)
output.table$n.control[8] <- sum(x.sub$treat==0)
output.table$t.app[8] <- sum(x.sub$applied[x.sub$treat==1])
output.table$c.app[8] <- sum(x.sub$applied[x.sub$treat==0])



## women in 11-25 programs
x.sub <- x[x$Dept.Rank >=11 & x$Dept.Rank <=25,]
x.sub <- x.sub[x.sub$sex=="Female",]
output.table$subgroup[9] <- "Women Top 11 to 25"
output.table$n[9] <- nrow(x.sub)
output.table$n.treated[9] <- sum(x.sub$treat)
output.table$n.control[9] <- sum(x.sub$treat==0)
output.table$t.app[9] <- sum(x.sub$applied[x.sub$treat==1])
output.table$c.app[9] <- sum(x.sub$applied[x.sub$treat==0])



## both men and women in 26-50 programs
x.sub <- x[x$Dept.Rank >=26,]
output.table$subgroup[10] <- "Top 26 to 50"
output.table$n[10] <- nrow(x.sub)
output.table$n.treated[10] <- sum(x.sub$treat)
output.table$n.control[10] <- sum(x.sub$treat==0)
output.table$t.app[10] <- sum(x.sub$applied[x.sub$treat==1])
output.table$c.app[10] <- sum(x.sub$applied[x.sub$treat==0])





## men in 26-50 programs
x.sub <- x[x$Dept.Rank >=26,]
x.sub <- x.sub[x.sub$sex=="Male",]
output.table$subgroup[11] <- "Men Top 26 to 50"
output.table$n[11] <- nrow(x.sub)
output.table$n.treated[11] <- sum(x.sub$treat)
output.table$n.control[11] <- sum(x.sub$treat==0)
output.table$t.app[11] <- sum(x.sub$applied[x.sub$treat==1])
output.table$c.app[11] <- sum(x.sub$applied[x.sub$treat==0])



## women in 26-50 programs
x.sub <- x[x$Dept.Rank >=26,]
x.sub <- x.sub[x.sub$sex=="Female",]
output.table$subgroup[12] <- "Women Top 26 to 50"
output.table$n[12] <- nrow(x.sub)
output.table$n.treated[12] <- sum(x.sub$treat)
output.table$n.control[12] <- sum(x.sub$treat==0)
output.table$t.app[12] <- sum(x.sub$applied[x.sub$treat==1])
output.table$c.app[12] <- sum(x.sub$applied[x.sub$treat==0])




save(output.table, file="ApplicationSummary.Rda")




library(xtable)

print(xtable(output.table, display=c("s", "s", "f", "f", "f", "f", "f"),
             digits=c(10, 10, 0, 0, 0, 0, 0)),
      include.rownames=FALSE)
